/*
  Copyright 2006-2012 Patrik Jonsson, sunrise@familjenjonsson.org

  This file is part of Sunrise.

  Sunrise is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 3 of the License, or
  (at your option) any later version.

  Sunrise is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Sunrise.  If not, see <http://www.gnu.org/licenses/>.

*/

/// \file
/// Contains the function for doing the iterative calculation of the
/// equilibrium radiation field in the presence of self-absorption.

#ifndef __equilibrium__
#define __equilibrium__

#include <iostream> 
#include <vector> 
#include "xfer.h"
#include "shoot.h"
#include "full_sed_emergence.h"
#include "boost/lexical_cast.hpp"
#include "mcrx-debug.h"

namespace mcrx {
  template<typename T_dust_model, typename T_absgrid, typename T_emgrid,
	   typename T_emission >
  bool
  determine_dust_equilibrium_intensities
  (
   T_dust_model& model,
   T_absgrid& absgrid,
   T_emgrid& emgrid,
   T_emission& emission,
   const typename T_dust_model::T_lambda& lambda,
   const typename T_dust_model::T_biaser& biaser,
   array_2& radiation_intensity,
   /** The state vector which is used to initialize the
       random number generators in the threads.  Must
       contain n_threads entries. */
   std::vector<T_rng::T_state>& rng_states,
   long n_rays_desired,
   int n_threads,
   bool bind_threads,
   int bank_size,
   T_float tolerance,
   T_float lum_percentile,
   T_float i_min,
   T_float i_max,
   const terminator& term,
   bool check_energy
   );

  // it's a sad state of affairs that this is easier than writing a
  // lambda and figuring out why it doesn't work...
  bool lumcomparer(const std::pair<int, std::pair<T_float, T_float> >& a,
		   const std::pair<int, std::pair<T_float, T_float> >& b) {
    return a.second.first > b.second.first; };
  bool cellcomparer(const std::pair<int, std::pair<T_float, T_float> >& a,
		    const std::pair<int, std::pair<T_float, T_float> >& b) {
    return a.first < b.first; };
  class noise_ratio_pred {
    T_float limit;
  public:
    noise_ratio_pred(T_float l) : limit(l) {};
    bool operator()(const std::pair<int, std::pair<T_float, T_float> >& a) const {
      return a.second.second/a.second.first > limit; };
  };
}


/** Iterates to convergence to determine the dust temperature
    distribution when self-absorption is included. This involves
    repeatedly calculating the dust temperature, how much extra dust
    emission this implies, and then running a ray tracing step to
    determine how this emission in turn heats other dust. The output
    is the dust_intensity array, which contains the radiation
    intensity due to dust emission in the grid cells.  Memory use can
    be very large in this function, because there are several arrays
    that are of size number of cells * number of wavelengths. For this
    reason, care must be taken to minimize temporaries. */
template<typename T_dust_model, typename T_absgrid, typename T_emgrid,
	 typename T_emission >
bool
mcrx::determine_dust_equilibrium_intensities
(
 /** The dust model which keeps the scatterers. Note that the dust
     model must be a class derived from grain_model so that it has
     functionality for calculating grain temperatures. */
 T_dust_model& model,
 /** The top-level grid object dealing with absorption. */
 T_absgrid& absgrid,
 /** The top-level grid dealing with grid_cell emission. */
 T_emgrid& emgrid,
 /** The emission, which should be the *primary* emission, not an
     emission object that contains grid_cell emitters, which are
     solved by energy conservation. */
 T_emission& emission,
 /** The wavelength vector used. Note that it is assumed that the dust
     model has already been set to this wavelength vector. */
 const typename T_dust_model::T_lambda& lambda,
 /** The biaser supplied to the xfer object. */
 const typename T_dust_model::T_biaser& biaser,
 /** The output radiation intensity in the cells after equilibrium has
     been determined. */
 array_2& radiation_intensity,
 /** The state vector which is used to initialize the
     random number generators in the threads.  Must
     contain n_threads entries. */
 std::vector<T_rng::T_state>& rng_states,
 /// The number of rays to shoot in the ray tracing stages.
 long n_rays_desired,
 /// The number of threads to use.
 int n_threads,
 /// If true, the threads are bound to the cpus.
 bool bind_threads,
 /// The thread bank size,
 int bank_size,
 /** The tolerance for determining when the iteration has
     converged. Since the calculation works by emitting the delta of
     the cell luminosities in each iteration, the luminosity in the
     grid will decrease over time as luminosity escapes the
     volume. When the dust luminosity in the grid has decreased to
     tolerance*L_initial, the iteration stops. */
 T_float tolerance,
 /** The cumulative luminosity percentile for cells included in the
     tolerance calculation. (Actually 1-that, such that 0.1 means that
     the cells comprising 90% of the luminosity are included.) */
 T_float lum_percentile,
 /** The intensity below which the ray is subject to Russian Roulette. */
 T_float i_min,
 /** The intensity above which the rays are split. */
 T_float i_max,
 const terminator& term,
 bool check_energy
 )
{
  using namespace std;
  using namespace blitz;

  typedef typename T_dust_model::T_lambda T_lambda;
  typedef typename T_dust_model::T_biaser T_biaser;
  typedef scatter_shooter T_shooter;
  typedef emergence<T_lambda> T_emergence;

  const T_float area_factor = absgrid.area_factor();

  cout << "Calculating dust equilibrium temperatures\n";

  // This vector keeps the same pointers to scatterers as the dust
  // model, but these are grain_model* and are used for the SED
  // calculation. 
  std::vector<grain_model<polychromatic_scatterer_policy, 
			  mcrx_rng_policy>*> grain_models;
  for(int i=0; i< model.n_scatterers(); ++i) {
    grain_models.push_back
      (dynamic_cast<grain_model<polychromatic_scatterer_policy, 
				mcrx_rng_policy>*>(&model.get_scatterer(i)));
    assert(grain_models.back());
  }

  // We use an empty emergence object, since when just calculating
  // the intensities we are not using any cameras.
  T_emergence eme;

  radiation_intensity = 0;

  const int nc = absgrid.n_cells();
  const int nc_tot = mpi_reduce(nc, std::plus<int>());

  // save the volumes of the cells and the opacities for use in
  // convergence calculations
  array_1 volume(nc);

  // save the densities of the grid cells in a compact array
  array_2 densities(nc, model.zero_density().size());
  {
    int c=0;
    for (typename T_absgrid::const_iterator cc=absgrid.begin(); 
	 cc!=absgrid.end(); ++cc, ++c) {
      volume(c)= cc.volume();
      densities(c, Range::all())= 
	cc->data()->get_absorber().densities();
    }
  }

  // Calculate the amount of absorbed luminosity due to primary heating.
  const int nreal=10;
  T_float tot_lum = 0;
  array_1 new_abs(nc);
  array_1 mean_abs(nc), S_abs(nc), temp(nc);
  vector<pair<int, pair<T_float, T_float> > > old_celldata;
  vector<int> old_nc_notconv;
  vector<int> old_nrays;

  bool converged = false;
  int iter=1;

  while(!converged) {
    cout << "\nIteration " << iter << endl;
     
    // The dust SED calculation relies on the intensities being in
    // physical units, not simulation units, so we must take care to
    // convert dust_intensity to the same units as heating_intensity.
     
    // Ask the object containing the grid_cell_emission objects (the
    // ir_grid) to calculate the emission SEDs of the cells based on
    // the current estimate of the radiation field. We pass the
    // radiation field
    emgrid.calculate_SED(radiation_intensity, 
			 grain_models, term, n_threads, 0,
			 false, true);

    cout << "Grid luminosity in this iteration: " << 
      mpi_reduce(emgrid.total_luminosity(), std::plus<T_float>()) << "\n";

    // If we have (apparent) energy nonconservation during the SED
    // calculation, print a warning. This should NEVER happen.
    if( (iter>1) && 
	(mpi_reduce(emgrid.total_luminosity(), std::plus<T_float>()) > 
	mpi_reduce(tot_lum, std::plus<T_float>())*1.001)) {
      cerr << "Fatal: luminosity nonconservation in SED calculation by " << emgrid.total_luminosity()/tot_lum-1 <<  "\n" << endl;
      cerr << "This should seriously never happen. I mean never ever. This means a bug!\n";
      ofstream lumdump("lumdump.txt");
      for(int i=0;i<new_abs.size();++i)
	lumdump << i << '\t' << new_abs(i) << '\t' << emgrid.distribution().weight(i) << '\n';
      lumdump.close();
      throw 0;
    }

    // now shoot so we get an updated estimate of the radiation
    // intensity. We must reset the grid first so the old intensities
    // are erased.
    cout << "Shooting" << endl;
     
    // START SHOOT
    assert (n_threads == rng_states.size()); 
     
    // The xfer rngs are loaded in the thread shooting function.
    mcrx_rng_policy rng;
     
    const bool immediate_reemission = (iter>1);
    xfer<T_dust_model, T_absgrid> x 
      (absgrid, emission, eme, model, rng, biaser, 
       i_min, i_max, true, immediate_reemission);
    if(immediate_reemission)
      cout << "Using immediate reemission\n";
    else
      cout << "Not using immediate reemission\n";

    // we shoot several times, adding up the result, to get a handle
    // on the variance
    new_abs=0;
    for(int realization=0; realization < nreal; ++realization) {

      long n_rays=0;
      absgrid.reset();
      model.reset();
      radiation_intensity = 0;

      // XXX is this really ok? the intensities are normalized during
      // shooting to be a correct estimate based on the number of
      // rays. So doing it N times will give an estimate which is N*
      // what it should be. Don't we have to divide that out?
      if( shoot (x, T_shooter(), term, rng_states, 
		 n_rays_desired/nreal, n_rays, n_threads, bind_threads,
		 bank_size, 11, 
		 string("equilibrium_iter_")+boost::lexical_cast<string>(iter)) )
	// we got termination, return true and stop
	// we are in a consistent state, so we can return here
	return true;
     
      DEBUG(1,cout << "Dust_model absorbed luminosity this iteration: "  \
	    << integrate_quantity(model.absorbed(), lambda, false) << endl;);

      // calculate absorption rate in cells (this is a cumulative
      // estimate for each realization as we are building up the
      // intensity in absgrid so we have to subtract the previous result)
      for (int c=0; c<nc; ++c) {
	new_abs(c) = 
	  integrate_quantity(absgrid.intensities()(c, Range::all())*
			     model.total_abs_length_to_absorption(densities(c, Range::all())),
			     lambda, false);
      }

      // calculate running variance (Knuth TAOCP vol 2, 3rd edition, page 232)
      if(realization==0) {
	mean_abs = new_abs;
	S_abs = 0;
      }
      else {
	temp = new_abs-mean_abs;
	mean_abs += (new_abs-mean_abs)/realization;
	S_abs += temp*(new_abs-mean_abs);
      }

      // update dust radiative intensity
      radiation_intensity += absgrid.intensities();
    }
    // convert S to stdev, and mean to total. Note that S_abs is
    // the population variance of the realizations, which is equal to
    // the variance of the sum because the 1/N^2 form the variance of
    // the mean and the N^2 from multiplying the mean by N cancel.
    S_abs = sqrt(S_abs/(nreal-1));
    mean_abs *= nreal;

    // convert the intensity to the correct units, and take the mean
    radiation_intensity = radiation_intensity/
      (4*constants::pi*volume(tensor::i)*area_factor*nreal);
    
    cout << "Testing whether converged...  " << endl;

    /* The convergence check only runs on Task 0. The other tasks just
       send their data.*/

    if(!is_mpi_master()) {
      mpi_stack_arrays(mean_abs, firstDim);
      mpi_stack_arrays(S_abs, firstDim);
    }
    else {

      // let's just assemble the full data across all cells here. If we
      // have such large runs that we can't store 2-3 vectors of the
      // total cell number then we'll have to make it more complicated.

      vector<pair<int, pair<T_float, T_float> > > celldata(nc_tot);
      {
	const array_1 mean_abs_all = mpi_stack_arrays(mean_abs, firstDim);
	const array_1 stdev_abs_all = mpi_stack_arrays(S_abs, firstDim);
	for(int c=0; c<nc_tot; ++c) {
	  celldata[c].first=c;
	  celldata[c].second.first=mean_abs_all(c);
	  celldata[c].second.second=stdev_abs_all(c);
	}
      }

      // sort it by decreasing luminosity
      sort(celldata.begin(), celldata.end(), lumcomparer);
      //[](auto a, auto b) {return a.second.first < b.second.first; });

      // partial sum of luminosities
      vector<T_float> cell_cumlum(nc_tot);
      transform(celldata.begin(), celldata.end(), cell_cumlum.begin(),
		bind(&pair<T_float, T_float>::first, 
		     bind(&pair<int, pair<T_float, T_float> >::second,
			  _1)));
      partial_sum(cell_cumlum.begin(), cell_cumlum.end(), cell_cumlum.begin());
      tot_lum = cell_cumlum.back();
      const T_float cum_frac_limit = 1-lum_percentile;
      // To converge, we demand that the individual cells have a stdev
      // small enough that the convergence limit is at this many sigma.
      const T_float nsigma_tolerance = 3;
      const T_float cell_noise_tolerance = tolerance/nsigma_tolerance;
      int nc_relevant = 
	std::lower_bound(cell_cumlum.begin(), cell_cumlum.end(), 
			 tot_lum*cum_frac_limit) - cell_cumlum.begin();
      if(nc_relevant==0)
	nc_relevant=1;
      assert(nc_relevant<=nc_tot);

      cout << "Absorbed luminosity this iteration: " << tot_lum << endl;
      cout << cum_frac_limit << " percentile of luminosity contains " 
	   << nc_relevant << " cells\n";

      // sort relevant cells back into cell order
      sort(celldata.begin(), celldata.begin()+nc_relevant+1, cellcomparer);


      // count how many of the relevant cells have low signal/noise.
      noise_ratio_pred p(cell_noise_tolerance);
      const int nc_noisy = 
	count_if(celldata.begin(), celldata.begin()+nc_relevant+1, p);
      cout << nc_noisy << " cells have S/N < " << 1./cell_noise_tolerance << endl;

      // if debugging, we dump the arrays for this iteration to a file.
      const bool dump_cells=true;
      ofstream out;
      if(dump_cells)
	out.open(("ircalc_iter"+boost::lexical_cast<string>(iter)+".txt").c_str());

      // now we also need to evaluate whether the iteration has
      // converged. To do this we evaluate the change from last
      // iteration. But this is complicated by the fact that the set of
      // cells included in the percentile will likely not be exactly the
      // same.
      int nc_notconv=0;
      int nc_converging=0;
      int nc_common=0;
      int nc_noisy2=0;
      T_float max_change=0;
      for(int c=0, oc=0; c<nc_relevant && oc<old_celldata.size();) {
	if(celldata[c].first==old_celldata[oc].first) {
	  // we are looking at the same cell
	  ++nc_common;

	  // we want the mean of the ratio of the absorption in this and
	  // last iter. THe expectation value and variance of a ratio is
	  // not just <a>/<b>. From
	  // http://www.stat.cmu.edu/~hseltman/files/ratio.pdf:
	
	  // expectation value of ratio <a/b> ~ <a>/<b> + s(b)^2<a>/<b>^3
	  const T_float abs_ratio = 
	    celldata[c].second.first/old_celldata[oc].second.first +
	    old_celldata[oc].second.second*old_celldata[oc].second.second*
	    celldata[c].second.first/
	    pow(old_celldata[oc].second.first,3);
	  max_change = std::max(max_change, abs_ratio);

	  // the variance in the ratio is 
	  // Var(a/b) ~ <a>^2/<b>^2*(s(a)^2/<a>^2 +s(b)^2/<b>^2)
	  const T_float var_ratio = 
	    celldata[c].second.second*celldata[c].second.second/
	    (old_celldata[oc].second.first*old_celldata[oc].second.first) +

	    celldata[c].second.first*celldata[c].second.first*
	    old_celldata[oc].second.second*old_celldata[oc].second.second/
	    pow(old_celldata[oc].second.first,4);
	  const T_float sigma_ratio=sqrt(var_ratio);

	  // if the noise in the ratio is larger than the
	  // tolerance, we will never converge. So in that case we need
	  // to increase number of rays.
	  if(sigma_ratio>tolerance/nsigma_tolerance)
	    ++nc_noisy2;

	  // if the change is larger than the noise, we are presumably
	  // still in the convergence regime
	  const T_float significance = abs(abs_ratio-1.0)/sigma_ratio;
	  if(significance>3)
	    ++nc_converging;

	  // if the change is greater than the tolerance, the cell is
	  // not converged. However, because we are 
	  if(abs(abs_ratio-1.0)>tolerance)
	    ++nc_notconv;

	  if(dump_cells)
	    out << celldata[c].first 
		<< "\t" << celldata[c].second.first 
		<< '\t' << celldata[c].second.second
		<< "\t" << abs_ratio
		<< '\t' << sigma_ratio
		<< '\t' << volume(celldata[c].first) << endl;

	  ++c; ++oc;
	}
	else {
	  // we are not looking at the same cell. increment the appropriate counter
	  if(celldata[c].first<old_celldata[oc].first)
	    ++c;
	  else
	    ++oc;
	}
      }

      // the number of expected cells outside the tolerance just based
      // on the variance. (1-erf(n/sqrt(2) is the probability of being > n
      // sigma away in a Gaussian.)
      const int nc_notconv_exp = ceil((1-erf(nsigma_tolerance/sqrt(2)))*nc_common);

      cout << nc_common << " cells are common with last iteration\n" 
	   << nc_noisy2 << " cells have noise in the ratio outside tolerance\n"
	   << nc_converging << " cells appear to still be converging (largest change: " << max_change << ")\n"
	   << nc_notconv << " cells are varying outside tolerance (" 
	   << nc_notconv_exp << " expected at " << nsigma_tolerance << " sigma)\n";

      // save nrays before we potentially increase
      old_nrays.push_back(n_rays_desired);
    
      if(nc_noisy<nc_notconv_exp && nc_noisy2<nc_notconv_exp) {
	cout << "MC uncertainty is sufficiently small\n";
	// test if we are making progress on the number of not converged
	// cells. Do this only if we have not increased # of rays in the
	// last 2 iters.
	if(old_nc_notconv.size()>3 &&
	   old_nc_notconv[old_nc_notconv.size()-2]<2*nc_notconv &&
	   old_nrays[old_nrays.size()-3]==n_rays_desired) {
	  cout << "But number of cells outside tolerance does not seem to be decreasing\n";
	  n_rays_desired *= 2;
	  cout << "\tincreasing # of rays to " << n_rays_desired << endl;
	}
	else {
	  if(old_nrays[old_nrays.size()-3]!=n_rays_desired)
	    cout << "Increased rays within the last two iters, not checking convergence rate\n";
	  else if(old_nc_notconv[old_nc_notconv.size()-2]>=2*nc_notconv)
	    cout << "Number of unconverged cells seem to be decreasing\n";
	}
      }
      else {
	n_rays_desired *= 2;
	cout << "\tMC uncertainty is significant\n";
	cout << "\tincreasing # of rays to " << n_rays_desired << endl;
      }
       

      // we assume we are converged if the change is within tolerance
      // AND the noise is small compared to the tolerance
      converged = (nc_noisy<nc_notconv_exp && nc_noisy2<nc_notconv_exp && 
		   nc_notconv<=nc_notconv_exp && 
		   1.0*nc_common/nc_relevant>0.5);

      // update old values
      old_celldata.assign(celldata.begin(), celldata.begin()+nc_relevant+1);
      old_nc_notconv.push_back(nc_notconv);

      out.close();
    } // if is_mpi_master

    // update tasks with convergence state
    mpi_broadcast(n_rays_desired);
    mpi_broadcast(converged);

    ++iter;

  }
  cout << "Converged." << endl;
  ASSERT_ALL(radiation_intensity<HUGE_VAL);
  ASSERT_ALL(radiation_intensity==radiation_intensity);

  return false;
}



#endif
	
